Loading data

We are going to look at NYC Restaurant Inspection Results.

library(tidyverse)
## -- Attaching packages --------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.2
## √ tidyr   1.1.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts ------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(httr)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
## 
##     config
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
get_all_inspections = function(url) {
  
  all_inspections = vector("list", length = 0)
  
  loop_index = 1
  chunk_size = 50000
  DO_NEXT = TRUE
  
  while (DO_NEXT) {
    message("Getting data, page ", loop_index)
    
    all_inspections[[loop_index]] = 
      GET(url,
          query = list(`$order` = "zipcode",
                       `$limit` = chunk_size,
                       `$offset` = as.integer((loop_index - 1) * chunk_size)
                       )
          ) %>%
      content("text") %>%
      fromJSON() %>%
      as_tibble()
    
    DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
    loop_index = loop_index + 1
  }
  
  all_inspections
  
}

url = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"

nyc_inspections = 
  get_all_inspections(url) %>%
  bind_rows() 
## Getting data, page 1
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6
## Getting data, page 7
## Getting data, page 8
## Getting data, page 9

Filter

tidy_inspections =
nyc_inspections %>%
  select(phone,cuisine_description,inspection_date,score,latitude,longitude,building) %>%
  filter(inspection_date == "2019-02-21T00:00:00.000")

Plotly plots

scatter plot

tidy_inspections %>%
  mutate(text_label = str_c("Cuisine: ", cuisine_description, "\nScore: ", score, "\nNumber: ", phone)) %>% 
  
  plot_ly(
    x = ~latitude, y = ~longitude, color = ~building, alpha = 0.5,text = ~text_label, type = "scatter", mode = "markers")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: Ignoring 3 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Boxplot

tidy_inspections %>% 
  mutate(score = as.factor(score)) %>% 
  count(score) %>% 
  mutate(
    score = fct_reorder(score, n)) %>%  
  
  plot_ly(x = ~score, y = ~n, color = ~score, type = "box", colors = "viridis")
## Warning: Ignoring 1 observations

Barplot

tidy_inspections %>%
  count(building) %>% 
  plot_ly(x = ~building, y = ~n, color = ~n, type = "bar", colors = "viridis")
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays